import scrapy


class LianxiSpider(scrapy.Spider):
    name = "lianxi"
    # allowed_domains = ["baidu.com"]
    start_urls = ["https://quotes.toscrape.com"]

    def parse(self, response):
        # text = response.text
        # html_info = response.css('.quote .tags a::text').getall()
        # href = response.css('.quote .tags a::attr(href)').getall()
        # for k, v in zip(html_info, href):
        #     yield {
        #         'tag': k,
        #         'href': v
        #     }
        #     print(k, v)
        text = response.xpath('//div[@class="col-md-8"][1]/div[@class="quote"]/span[1]/text()').getall()
        new_url = response.xpath('//li[@class="next"][1]/a/@href').getall()
        for i in text:
            yield {
                'text': i
            }
            print(i)
        # if new_url:
        #     # 手动拼接 url
        #     # yield scrapy.Request(url="https://quotes.toscrape.com" + new_url[0])
        #
        #     # 自动拼接 url
        #     # yield scrapy.Request(url=response.urljoin(new_url[0]))
        #
        #     # 不需要拼接 url
        #     yield response.follow(new_url[0])
        # else:
        #     print('没有下一页')
        #
        
        
